//do-file for analyzing retention policy counterfactual


//get tax data ready
import delimited "$data/Taxsim/ally.csv", clear
keep if year == 2000 | year == 2008
gen tax = st_avg
keep state_id tax state_abv year
replace year = 2010 if year==2008


replace state_abv = subinstr(state_abv, `"""',  "", .)
drop if state_id == 52 | state_id == 9
ren state_abv state
replace state = trim(state)
merge m:1 state using "$data/Crosswalks/state_fips_crosswalk", keep(match) nogen
keep tax statefips year
order statefips tax
sort statefips
//keep tax

//now nab state sales taxes
preserve
import excel "$data/taxsim/state_sales_tax_2.xlsx", clear
keep B D
drop if _n<9
drop if _n>51
replace D = "0" if strpos(D, "NO")
destring D, replace force
replace D = 6.1 if B == "Utah"
replace D = 5.2 if B == "Virginia"
ren D salestax
ren B state_full
replace state_full = "California" if strpos(state_full, "California")
merge 1:1 state_full using "$data/Crosswalks/state_fips_crosswalk", keep(match) nogen
keep statefips salestax
tempfile sales
save `sales'
restore

merge m:1 statefips using `sales', keep(match) nogen
replace sales = sales/100
replace tax = tax + sales
drop salestax
drop if year==.
collapse (mean) tax, by(statefips)
gen statenum = _n
save "$temp/state_tax_means", replace


import delimited "$dir/Model/Simulated_data/simulated_data_base.csv", clear
ren v1 state_born
ren v2 state_move
drop v3 v4
ren v5 parent_marr
ren v6 kid_marr
ren v7 parent_time_inv
ren v8 kid_earn_p2
ren v9 kid_earn_p3
ren v10 kid_hc_p3
ren v11 ability
ren v12 coll_par
ren v13 coll_kid
ren v14 weight
ren v15 mig
ren v16 type
ren v17 mig2
ren v18 l2
ren v19 state_p2
ren v20 race
ren v21 region
ren v22 util
ren v23 parent_fam_earn
ren v24 kid_fam_earn
ren v25 parent_pctile
ren v26 kid_pctile
gen uniqid = _n
save "$temp/cfact_retention_individual_base", replace


//compute college shares and earnings for all states in periods 2 and 3

//period 2:
preserve
keep l2 coll_kid kid_earn_p2 weight
ren l2 statenum
merge m:1 statenum using "$temp/state_tax_means", keep(match) nogen
gen taxrev = kid_earn_p2 * tax
collapse (sum) taxrev (mean) coll_kid [w=weight], by(statefips)
ren taxrev taxrev_p2_baseline
ren coll_kid coll_share_p2_baseline
keep statefips coll_share taxrev
save "$temp/state_retention_stats_p2_baseline", replace
restore


//period 3
keep state_move coll_kid kid_fam_earn weight kid_marr
ren state_move statefips
merge m:1 statefips using "$temp/state_tax_means", keep(match) nogen
gen taxrev = (kid_fam_earn) * tax //period 3 earnings
replace taxrev = taxrev + (kid_fam_earn * 1.06 * 1.12)*tax //period 4 earnings

//college share
gen count = 1
replace count = 2 if kid_marr == 2
replace coll_kid = 2 if kid_marr == 2 & coll_kid == 1 //for simple approximation assuming assortive mating over education. Should even out.

collapse (sum) count coll_kid taxrev [w=weight], by(statefips)
gen coll_share_p3_baseline = coll_kid/count
ren taxrev taxrev_p3_baseline
keep statefips coll_share taxrev
save "$temp/state_retention_stats_p3_baseline", replace






//now start the process of getting all this information from the other datasets
import delimited "$dir/Model/Simulated_data/simulated_data_base.csv", clear
ren v1 state_born
levelsof state_born, clean local(fips)
local cfacts `"1 2 5"'
local counter 0

qui{
foreach fip in `fips'{
	local counter `++counter'
	
	foreach c in `cfacts'{
	    noi di as result "`fip' || `c'"
	    local filenum = `c' * 1000 + `counter'
		
		import delimited "$dir/Model/Simulated_data/simulated_data_cfact_retention_`filenum'.csv", clear
	ren v1 state_born
	ren v2 state_move
	ren v3 kid_marr
	ren v4 kid_earn_p2
	ren v5 l2
	ren v6 kid_earn_p3
	ren v7 coll_kid
	ren v8 weight
	ren v9 util
	ren v10 parent_fam_earn
	ren v11 kid_fam_earn
	ren v12 parent_pctile
	ren v13 kid_pctile
	replace coll_kid = coll_kid-1
		
		
		//compute college shares and earnings for RELEVANT STATE in periods 2 and 3
		
		//period 2
		preserve
		keep l2 coll_kid kid_earn_p2 weight
		ren l2 statenum
		keep if statenum == `counter' //l2 goes from 1-509
		merge m:1 statenum using "$temp/state_tax_means", keep(match) nogen
		gen taxrev_coll = kid_earn_p2 * tax if coll_kid
		replace taxrev_coll = 0 if !coll_kid
		gen taxrev_ncoll = kid_earn_p2 * tax if !coll_kid
		replace taxrev_ncoll = 0 if coll_kid
		gen taxrev = kid_earn_p2 * tax
		gen subsidy = `c' * 10000 / (47961 * 18) if coll_kid == 1 //pay out subsidy
		collapse (sum) taxrev_coll taxrev_ncoll subsidy (mean) coll_kid [w=weight], by(statefips)
		
		
		//1.6, 0.4
		ren taxrev_coll taxrev_coll_p2_`c'
		ren taxrev_ncoll taxrev_ncoll_p2_`c'
		ren coll_kid coll_share_p2_`c'
		ren subsidy subsidy_p2_`c'
		keep statefips coll_share taxrev* subsidy		
		save "$temp/state_retention_stats_p2_`fip'_`c'", replace
		restore

		//period 3
		keep state_move coll_kid kid_fam_earn weight kid_marr
		ren state_move statefips
		keep if statefips == `fip'
		merge m:1 statefips using "$temp/state_tax_means", keep(match) nogen
		gen taxrev_coll = (kid_fam_earn) * tax if coll_kid //period 3 earnings
		replace taxrev_coll = taxrev_coll + (kid_fam_earn * 1.06 * 1.12)*tax if coll_kid //period 4 earnings
		
		gen taxrev_ncoll = (kid_fam_earn) * tax if !coll_kid //period 3 earnings
		replace taxrev_ncoll = taxrev_ncoll + (kid_fam_earn * 1.06 * 1.12)*tax if !coll_kid //period 4 earnings
	
		//college share
		gen count = 1
		replace count = 2 if kid_marr == 2
		replace coll_kid = 2 if kid_marr == 2 & coll_kid == 1 //for simple approximation assuming assortive mating over education. Should even out.

		collapse (sum) count coll_kid taxrev_coll taxrev_ncoll [w=weight], by(statefips)
		gen coll_share_p3_`c' = coll_kid/count
		ren taxrev_coll taxrev_coll_p3_`c'
		ren taxrev_ncoll taxrev_ncoll_p3_`c'
		keep statefips coll_share taxrev*
		save "$temp/state_retention_stats_p3_`fip'_`c'", replace
	}
}
}


//now start the process of getting all this information from the other datasets
import delimited "$dir/Model/Simulated_data/simulated_data_base.csv", clear
ren v1 state_born
levelsof state_born, clean local(fips)
local cfacts `"1 2 5"'
foreach c in `cfacts'{
    clear
	foreach fip in `fips'{
	    append using "$temp/state_retention_stats_p2_`fip'_`c'"
	}
	save "$temp/state_retention_stats_p2_`c'", replace
	
	clear
	foreach fip in `fips'{
	    append using "$temp/state_retention_stats_p3_`fip'_`c'"
	}
	save "$temp/state_retention_stats_p3_`c'", replace
}

//add all together
local cfacts `"1 2 5"'
use "$temp/state_retention_stats_p2_baseline"
merge 1:1 statefips using "$temp/state_retention_stats_p3_baseline", keep(match) nogen
foreach c in `cfacts'{
    merge 1:1 statefips using "$temp/state_retention_stats_p2_`c'", keep(match) nogen
	merge 1:1 statefips using "$temp/state_retention_stats_p3_`c'", keep(match) nogen
}

save "$temp/state_retention_stats_all", replace




*********now we can begin the analysis!**********
use "$temp/state_retention_stats_all", clear
ren statefips stfips
merge m:1 stfips using "$data/Crosswalks/state_div_crosswalk", keep(match) nogen
ds taxrev*
foreach var in `r(varlist)'{
    replace `var' = 0 if stfips == 33 //adjust NH
}
gen rev_total_baseline = taxrev_p2_baseline + taxrev_p3_baseline


local cfacts `"1 2 5"'
foreach c in `cfacts'{
    //compute change in college share
	gen change_coll_share_p2_`c' = (coll_share_p2_`c' - coll_share_p2_baseline) * 100
	gen change_coll_share_p3_`c' = (coll_share_p3_`c' - coll_share_p3_baseline) * 100
	
	
	//adjust tax revenue according to Moretti
	replace taxrev_coll_p2_`c' = taxrev_coll_p2_`c' * (1 + .004 * change_coll_share_p2_`c')
	replace taxrev_coll_p3_`c' = taxrev_coll_p3_`c' * (1 + .004 * change_coll_share_p3_`c')
	
	replace taxrev_ncoll_p2_`c' = taxrev_ncoll_p2_`c' * (1 + .016 * change_coll_share_p2_`c')
	replace taxrev_ncoll_p3_`c' = taxrev_ncoll_p3_`c' * (1 + .016 * change_coll_share_p3_`c')

	
	//total revenue and percentage change
	gen rev_total_`c' = taxrev_coll_p2_`c' + taxrev_coll_p3_`c' + taxrev_ncoll_p2_`c' + taxrev_ncoll_p3_`c' - subsidy_p2_`c'
	gen change_rev_`c' = ((rev_total_`c' - rev_total_baseline)/rev_total_baseline) * 100
}

//report table
local divs `"NE MA ENC WNC SA ESC WSC MO PA"'
local cfacts `"1 2 5"'
local counter = 0
qui{
foreach div in `divs'{
	local counter `++counter'
	
	//loop over counterfactuals
	foreach cfact in `cfacts'{
		
		su change_coll_share_p3_`cfact' if div == `counter' 
		local coll`cfact' = round(`r(mean)', .01)
		
		su change_rev_`cfact' if div == `counter'
		local rev`cfact' = round(`r(mean)', .01)
	}
	
	//report in Tex-able form
	noi di as result "`div' & `coll1' & `rev1' & `coll2' & `rev2' & `coll5' & `rev5' \\ " 
}
}

save "$temp/cfact_results_retention", replace



****************Part 2

//compute fraction of college grads that were going to be in the state regardless.
use "$temp/cfact_retention_individual_base", clear
keep uniqid coll_kid state_move
save "$temp/retention_choices_baseline", replace

//for each counterfactual, keep kids in affected state with a college degree. Then see how many we can match back to original dataset.
//now start the process of getting all this information from the other datasets
import delimited "$dir/Model/Simulated_data/simulated_data_base.csv", clear
ren v1 state_born
levelsof state_born, clean local(fips)
local cfacts `"1 2 5"'
local counter 0

qui{
foreach fip in `fips'{
	local counter `++counter'
	
	foreach c in `cfacts'{
	    noi di as result "`fip' || `c'"
	    local filenum = `c' * 1000 + `counter'
		
		import delimited "$dir/Model/Simulated_data/simulated_data_cfact_retention_`filenum'.csv", clear
	ren v1 state_born
	ren v2 state_move
	ren v3 kid_marr
	ren v4 kid_earn_p2
	ren v5 l2
	ren v6 kid_earn_p3
	ren v7 coll_kid
	ren v8 weight
	ren v9 util
	ren v10 parent_fam_earn
	ren v11 kid_fam_earn
	ren v12 parent_pctile
	ren v13 kid_pctile
		gen uniqid = _n


	
		keep uniqid coll_kid state_move
		keep if state_move == `fip'
		keep if coll_kid == 1
		keep uniqid coll_kid state_move
		save "$temp/retention_choices_`fip'_`c'", replace	
	}	
}
}


//now start the process of getting all this information from the other datasets
import delimited "$dir/Model/Simulated_data/simulated_data_base.csv", clear
ren v1 state_born
levelsof state_born, clean local(fips)
local cfacts `"1 2 5"'
foreach c in `cfacts'{
    clear
	foreach fip in `fips'{
	    append using "$temp/retention_choices_`fip'_`c'"
	}
	save "$temp/retention_choices_`c'"
}


qui{
local cfacts `"1 2 5"'
foreach c in `cfacts'{
    use "$temp/retention_choices_`c'", clear
	merge 1:1 uniqid state_move coll_kid using "$temp/retention_choices_baseline", keep(1 3)
	noi tab _merge	
}
}


	


//
